import pandas as pd
import numpy as np
metro_df = pd.read_csv("/scratch/zwang/tmp/redfin_metro_market_tracker.tsv000", delimiter='\t')
metro_df.describe()
| period_duration | region_type_id | table_id | city | state | property_type_id | median_sale_price | median_sale_price_mom | median_sale_price_yoy | median_list_price | ... | sold_above_list | sold_above_list_mom | sold_above_list_yoy | price_drops | price_drops_mom | price_drops_yoy | off_market_in_two_weeks | off_market_in_two_weeks_mom | off_market_in_two_weeks_yoy | parent_metro_region_metro_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 243702.0 | 243702.0 | 243702.00000 | 0.0 | 0.0 | 243702.000000 | 2.432700e+05 | 224290.000000 | 218023.000000 | 2.275400e+05 | ... | 243087.000000 | 224093.000000 | 217774.000000 | 142085.000000 | 132403.000000 | 116606.000000 | 220545.000000 | 203646.000000 | 196641.000000 | 243702.00000 |
| mean | 30.0 | -2.0 | 30188.91170 | NaN | NaN | 4.137459 | 2.095753e+05 | 1.732001 | 2.162750 | 2.288758e+05 | ... | 0.182319 | 0.002564 | 0.027859 | 0.193195 | 0.001587 | 0.003566 | 0.265338 | 0.002672 | 0.031407 | 30188.91170 |
| std | 0.0 | 0.0 | 11607.33619 | NaN | NaN | 4.271163 | 1.901909e+05 | 499.981736 | 589.808332 | 2.078979e+05 | ... | 0.187989 | 0.171851 | 0.178994 | 0.114948 | 0.084396 | 0.095875 | 0.242623 | 0.194101 | 0.207323 | 11607.33619 |
| min | 30.0 | -2.0 | 10140.00000 | NaN | NaN | -1.000000 | 1.000000e+00 | -0.999996 | -0.999994 | 1.230000e+02 | ... | 0.000000 | -1.000000 | -1.000000 | 0.000043 | -0.933333 | -0.964286 | 0.000000 | -1.000000 | -1.000000 | 10140.00000 |
| 25% | 30.0 | -2.0 | 19740.00000 | NaN | NaN | -1.000000 | 1.190000e+05 | -0.075556 | -0.021675 | 1.299000e+05 | ... | 0.029851 | -0.033640 | -0.020833 | 0.125000 | -0.031904 | -0.035371 | 0.043478 | -0.050000 | -0.034799 | 19740.00000 |
| 50% | 30.0 | -2.0 | 30780.00000 | NaN | NaN | 4.000000 | 1.650000e+05 | 0.006535 | 0.077578 | 1.795000e+05 | ... | 0.142857 | 0.000000 | 0.007125 | 0.178404 | 0.001507 | 0.001375 | 0.229167 | 0.000000 | 0.006536 | 30780.00000 |
| 75% | 30.0 | -2.0 | 40420.00000 | NaN | NaN | 6.000000 | 2.417348e+05 | 0.096972 | 0.200734 | 2.600000e+05 | ... | 0.253383 | 0.039843 | 0.076923 | 0.239316 | 0.033931 | 0.038283 | 0.418919 | 0.055556 | 0.099883 | 40420.00000 |
| max | 30.0 | -2.0 | 49780.00000 | NaN | NaN | 13.000000 | 1.700000e+07 | 189999.000000 | 220999.000000 | 2.890000e+07 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.925926 | 0.952381 | 1.000000 | 1.000000 | 1.000000 | 49780.00000 |
8 rows × 49 columns
# metro_df.info()
ny_df = metro_df[metro_df["region"] == 'New York, NY metro area'].sort_values(by='last_updated')
ny_df["period_begin"] = pd.to_datetime(ny_df["period_begin"])
ny_df = ny_df.sort_values(by="period_begin")
condo_df = ny_df[ny_df["property_type"] == "Condo/Co-op"]
import plotly.express as px
fig = px.line(ny_df, x="period_begin", y="median_sale_price", title='New York Metro Area median sale price by property type', color="property_type")
fig.show()
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
condo_df[condo_df["period_begin"] == "Jun 1, 2015"]
| period_begin | period_end | period_duration | region_type | region_type_id | table_id | is_seasonally_adjusted | region | city | state | state_code | property_type | property_type_id | median_sale_price | median_sale_price_mom | median_sale_price_yoy | median_list_price | median_list_price_mom | median_list_price_yoy | median_ppsf | median_ppsf_mom | median_ppsf_yoy | median_list_ppsf | median_list_ppsf_mom | median_list_ppsf_yoy | homes_sold | homes_sold_mom | homes_sold_yoy | pending_sales | pending_sales_mom | pending_sales_yoy | new_listings | new_listings_mom | new_listings_yoy | inventory | inventory_mom | inventory_yoy | months_of_supply | months_of_supply_mom | months_of_supply_yoy | median_dom | median_dom_mom | median_dom_yoy | avg_sale_to_list | avg_sale_to_list_mom | avg_sale_to_list_yoy | sold_above_list | sold_above_list_mom | sold_above_list_yoy | price_drops | price_drops_mom | price_drops_yoy | off_market_in_two_weeks | off_market_in_two_weeks_mom | off_market_in_two_weeks_yoy | parent_metro_region | parent_metro_region_metro_code | last_updated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 140231 | 2015-06-01 | 2015-06-30 | 30 | metro | -2 | 35614 | f | New York, NY metro area | NaN | NaN | NY | Condo/Co-op | 3 | 475790.0 | 0.09377 | NaN | 489950.0 | 0.031474 | NaN | 532.374101 | 0.06219 | NaN | 510.204082 | -0.021088 | NaN | 2392.0 | 0.296477 | NaN | 1452.0 | 0.247423 | NaN | 3404.0 | -0.090812 | NaN | 18170.0 | -0.003674 | NaN | 7.6 | -2.3 | NaN | 110.0 | -2.0 | NaN | 0.982339 | 0.000358 | NaN | 0.099498 | 0.001937 | NaN | 0.096037 | 0.013568 | NaN | 0.092975 | -0.040186 | NaN | New York, NY | 35614 | 2022-04-10 14:38:16 |
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
# Add traces
fig.add_trace(
go.Scatter(x=condo_df["period_begin"], y=condo_df["median_ppsf"], name="median ppsf"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=condo_df["period_begin"], y=condo_df["homes_sold"], name="number of homes sold"),
secondary_y=True,
)
# Add figure title
fig.update_layout(
title_text="New York Metro Area condo median ppsf and homes sold"
)
# Set x-axis title
fig.update_xaxes(title_text="time")
# Set y-axes titles
fig.update_yaxes(title_text="median ppsf", secondary_y=False)
fig.update_yaxes(title_text="number of homes sold", secondary_y=True)
fig.show()
neighborhood_df = pd.read_csv("/scratch/zwang/tmp/neighborhood_market_tracker.tsv000", delimiter='\t')
nynj_df = neighborhood_df[(neighborhood_df['region'].str.contains("New York, NY")) | (neighborhood_df['region'].str.contains("Jersey City, NJ"))]
hudson_df = neighborhood_df[neighborhood_df['region'] == 'Jersey City, NJ - Hudson Exchange']
pd.set_option('display.max_rows', None)
# nynj_df[["median_ppsf", "region"]].groupby('region').count().sort_values("median_ppsf")
nynj_df.head()
| period_begin | period_end | period_duration | region_type | region_type_id | table_id | is_seasonally_adjusted | region | city | state | state_code | property_type | property_type_id | median_sale_price | median_sale_price_mom | median_sale_price_yoy | median_list_price | median_list_price_mom | median_list_price_yoy | median_ppsf | median_ppsf_mom | median_ppsf_yoy | median_list_ppsf | median_list_ppsf_mom | median_list_ppsf_yoy | homes_sold | homes_sold_mom | homes_sold_yoy | pending_sales | pending_sales_mom | pending_sales_yoy | new_listings | new_listings_mom | new_listings_yoy | inventory | inventory_mom | inventory_yoy | months_of_supply | months_of_supply_mom | months_of_supply_yoy | median_dom | median_dom_mom | median_dom_yoy | avg_sale_to_list | avg_sale_to_list_mom | avg_sale_to_list_yoy | sold_above_list | sold_above_list_mom | sold_above_list_yoy | price_drops | price_drops_mom | price_drops_yoy | off_market_in_two_weeks | off_market_in_two_weeks_mom | off_market_in_two_weeks_yoy | parent_metro_region | parent_metro_region_metro_code | last_updated | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 40 | 2016-08-01 | 2016-10-31 | 90 | neighborhood | 1 | 79695 | f | New York, NY - Travis-Chelsea | New York | New York | NY | Townhouse | 13 | 552500.0 | -0.047414 | 0.284884 | 699900.0 | NaN | 0.102206 | 228.982914 | -0.002539 | 0.389459 | 322.371032 | NaN | 0.505828 | 2.0 | -0.333333 | -0.333333 | 1.0 | NaN | NaN | 3.0 | NaN | 0.50000 | 2.0 | NaN | -0.333333 | NaN | NaN | NaN | 112.0 | 21.0 | 57.0 | 0.993795 | -0.002068 | 0.042616 | 0.500000 | 0.166667 | 0.500000 | NaN | NaN | NaN | 1.000000 | NaN | NaN | New York, NY | 35614 | 2022-04-10 14:38:16 |
| 421 | 2015-07-01 | 2015-09-30 | 90 | neighborhood | 1 | 92373 | f | New York, NY - East New York | New York | New York | NY | Single Family Residential | 6 | 330000.0 | -0.004525 | NaN | 379000.0 | -0.081212 | NaN | 286.458333 | 0.158708 | NaN | 254.975124 | -0.123106 | NaN | 5.0 | -0.166667 | NaN | 1.0 | 0.000000 | NaN | 13.0 | -0.071429 | NaN | 23.0 | 0.15000 | NaN | NaN | NaN | NaN | 63.0 | -80.5 | NaN | 0.971365 | -0.037389 | NaN | 0.200000 | -0.133333 | NaN | NaN | NaN | NaN | 1.000000 | 1.000000 | NaN | New York, NY | 35614 | 2022-04-10 14:38:16 |
| 458 | 2015-10-01 | 2015-12-31 | 90 | neighborhood | 1 | 32311 | f | New York, NY - College Point | New York | New York | NY | Multi-Family (2-4 Unit) | 4 | 735000.0 | 0.020833 | NaN | 853500.0 | -0.024571 | NaN | 362.963035 | -0.033157 | NaN | 354.155361 | -0.029374 | NaN | 21.0 | 0.000000 | NaN | 3.0 | -0.571429 | NaN | 26.0 | 0.130435 | NaN | 22.0 | 0.10000 | NaN | NaN | NaN | NaN | 37.0 | 10.0 | NaN | 0.959129 | -0.013225 | NaN | 0.142857 | 0.000000 | NaN | NaN | NaN | NaN | 0.000000 | -0.285714 | NaN | New York, NY | 35614 | 2022-04-10 14:38:16 |
| 472 | 2017-02-01 | 2017-04-30 | 90 | neighborhood | 1 | 79695 | f | New York, NY - Travis-Chelsea | New York | New York | NY | Townhouse | 13 | 637500.0 | 0.250000 | 0.333487 | 549000.0 | -0.155255 | -0.036842 | 316.220238 | 1.247024 | 0.573198 | 348.189415 | 0.080089 | 0.516731 | 2.0 | 1.000000 | 0.000000 | 1.0 | NaN | NaN | 3.0 | 2.000000 | 0.00000 | 1.0 | NaN | -0.500000 | NaN | NaN | NaN | 87.0 | 67.0 | -2841.5 | 0.980920 | 0.342622 | 0.004429 | 0.000000 | 0.000000 | 0.000000 | NaN | NaN | NaN | 1.000000 | NaN | NaN | New York, NY | 35614 | 2022-04-10 14:38:16 |
| 731 | 2020-11-01 | 2021-01-31 | 90 | neighborhood | 1 | 43188 | f | New York, NY - Northeastern Queens | New York | New York | NY | Single Family Residential | 6 | 840000.0 | 0.005988 | 0.005988 | 919000.0 | -0.003138 | 0.032069 | 526.116578 | 0.013843 | 0.079213 | 561.419975 | 0.009039 | -0.021421 | 467.0 | -0.012685 | 0.225722 | 70.0 | -0.369369 | 0.22807 | 376.0 | -0.189655 | 0.13253 | 494.0 | -0.05364 | -0.081784 | NaN | NaN | NaN | 53.0 | -4.0 | -35.0 | 0.954161 | 0.003086 | 0.005286 | 0.079229 | 0.005233 | 0.008363 | NaN | NaN | NaN | 0.171429 | 0.054311 | 0.048622 | New York, NY | 35614 | 2022-04-10 14:38:16 |
zipcode_df = pd.read_csv("/scratch/zwang/tmp/zip_code_market_tracker.tsv000", delimiter='\t')
zipcode_df["zipcode"] = zipcode_df["region"].str[-5:]
zipcode_nynj_df = zipcode_df[(zipcode_df['parent_metro_region'].str.contains("New York, NY"))]
zipcode_allres_df = zipcode_nynj_df[(zipcode_nynj_df["property_type"] == "All Residential")]
zipcode_allres_df.head()
| period_begin | period_end | period_duration | region_type | region_type_id | table_id | is_seasonally_adjusted | region | city | state | state_code | property_type | property_type_id | median_sale_price | median_sale_price_mom | median_sale_price_yoy | median_list_price | median_list_price_mom | median_list_price_yoy | median_ppsf | median_ppsf_mom | median_ppsf_yoy | median_list_ppsf | median_list_ppsf_mom | median_list_ppsf_yoy | homes_sold | homes_sold_mom | homes_sold_yoy | pending_sales | pending_sales_mom | pending_sales_yoy | new_listings | new_listings_mom | new_listings_yoy | inventory | inventory_mom | inventory_yoy | months_of_supply | months_of_supply_mom | months_of_supply_yoy | median_dom | median_dom_mom | median_dom_yoy | avg_sale_to_list | avg_sale_to_list_mom | avg_sale_to_list_yoy | sold_above_list | sold_above_list_mom | sold_above_list_yoy | price_drops | price_drops_mom | price_drops_yoy | off_market_in_two_weeks | off_market_in_two_weeks_mom | off_market_in_two_weeks_yoy | parent_metro_region | parent_metro_region_metro_code | last_updated | zipcode | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 188 | 2016-09-01 | 2016-11-30 | 90 | zip code | 2 | 3399 | f | Zip Code: 10280 | NaN | New York | NY | All Residential | -1 | 800000.0 | 0.013942 | 0.228879 | 770000.0 | -0.094118 | -0.094118 | 1187.683075 | -0.022071 | 0.283930 | 1173.494312 | 0.000000 | 0.039908 | 23.0 | 0.277778 | 0.045455 | 1.0 | NaN | 0.000000 | 26.0 | 0.040000 | 0.130435 | 51.0 | -0.135593 | 0.062500 | NaN | NaN | NaN | 128.0 | 1.5 | 3.5 | 0.987247 | -0.006180 | -0.005739 | 0.043478 | -0.012077 | -0.001976 | NaN | NaN | NaN | 0.000000 | NaN | 0.000000 | New York, NY | 35614 | 2022-04-10 14:38:16 | 10280 |
| 251 | 2020-01-01 | 2020-03-31 | 90 | zip code | 2 | 3637 | f | Zip Code: 11106 | NaN | New York | NY | All Residential | -1 | 582000.0 | -0.221405 | -0.090625 | 689000.0 | -0.008633 | -0.040056 | 604.347826 | -0.177166 | -0.357973 | 1043.283582 | 0.117477 | 0.331429 | 19.0 | -0.269231 | 0.266667 | 5.0 | 0.250000 | 0.666667 | 55.0 | 0.279070 | 0.447368 | 75.0 | 0.209677 | 0.415094 | NaN | NaN | NaN | 97.0 | -27.5 | 1.5 | 0.960271 | -0.013728 | -0.017741 | 0.000000 | 0.000000 | 0.000000 | NaN | NaN | NaN | 0.200000 | -0.050000 | -0.133333 | New York, NY | 35614 | 2022-04-10 14:38:16 | 11106 |
| 350 | 2021-06-01 | 2021-08-31 | 90 | zip code | 2 | 2531 | f | Zip Code: 07024 | NaN | New Jersey | NJ | All Residential | -1 | 362000.0 | 0.057706 | 0.064706 | 330000.0 | 0.050955 | -0.080780 | 333.333333 | -0.003344 | 0.144304 | 364.324582 | 0.012293 | 0.064736 | 210.0 | 0.039604 | 1.100000 | 30.0 | 0.153846 | 0.500000 | 265.0 | -0.043321 | -0.131148 | 433.0 | -0.052516 | -0.039911 | NaN | NaN | NaN | 128.0 | 5.5 | -25.5 | 0.966563 | 0.001500 | 0.017561 | 0.128571 | -0.005092 | 0.058571 | NaN | NaN | NaN | 0.066667 | -0.048718 | -0.033333 | New York, NY | 35614 | 2022-04-10 14:38:16 | 07024 |
| 740 | 2020-01-01 | 2020-03-31 | 90 | zip code | 2 | 2658 | f | Zip Code: 07432 | NaN | New Jersey | NJ | All Residential | -1 | 440000.0 | 0.035294 | -0.027624 | 474500.0 | 0.078409 | 0.056793 | 255.518464 | -0.096337 | -0.060364 | 262.676462 | -0.039398 | -0.068374 | 12.0 | -0.200000 | -0.250000 | 1.0 | -0.666667 | -0.750000 | 24.0 | 0.411765 | 0.142857 | 24.0 | 0.263158 | 0.411765 | NaN | NaN | NaN | 105.5 | 12.5 | 44.5 | 1.002136 | 0.011914 | 0.023947 | 0.416667 | 0.216667 | 0.229167 | NaN | NaN | NaN | 0.000000 | -0.333333 | 0.000000 | New York, NY | 35614 | 2022-04-10 14:38:16 | 07432 |
| 751 | 2021-05-01 | 2021-07-31 | 90 | zip code | 2 | 2695 | f | Zip Code: 07512 | NaN | New Jersey | NJ | All Residential | -1 | 508000.0 | 0.000000 | 0.239024 | 472450.0 | -0.004215 | 0.061806 | 268.854749 | -0.039646 | 0.098124 | 283.170737 | 0.009542 | 0.148932 | 36.0 | 0.058824 | 0.333333 | 15.0 | 1.142857 | 0.250000 | 52.0 | 0.130435 | 0.083333 | 33.0 | -0.083333 | -0.108108 | NaN | NaN | NaN | 41.5 | 7.5 | -36.5 | 1.010654 | -0.002012 | -0.003013 | 0.444444 | 0.032680 | -0.074074 | NaN | NaN | NaN | 0.066667 | -0.076190 | -0.516667 | New York, NY | 35614 | 2022-04-10 14:38:16 | 07512 |
import json
nycgeojson = {}
with open("/scratch/zwang/tmp/ny.geojson", "r") as rfile:
nycgeojson = json.loads(rfile.read())
njgeojson = {}
with open("/scratch/zwang/tmp/nj.geojson", "r") as rfile:
njgeojson = json.loads(rfile.read())
for area in njgeojson["features"]:
area["properties"]["postalCode"] = area["properties"]["GEOID10"]
nynj_geojson = nycgeojson
nynj_geojson["features"] += njgeojson["features"]
latest_df = zipcode_allres_df[zipcode_allres_df['period_begin'] == '2022-01-01']
fig = px.choropleth(
latest_df,
geojson=nynj_geojson,
locations='zipcode',
color='median_ppsf',
color_continuous_scale="Reds",
featureidkey='properties.postalCode',
range_color=(500, 2500),
)
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}, title="NY / NJ median ppsf by zipcode 2022-01-01",)
fig.show()
%bbin
Adding sitedir: /mnt/scooter/bbin/20220429.19.10.11/pyimports
import plotly
plotly.offline.init_notebook_mode()
sharenb()
Original notebook path: '/mnt/shapurefb04_scratcht/zwang/notebooks/redfin.ipynb' Notebook as permanent ipynb: '/home/zwang/public_html/permanent_notebooks/20220501_redfin.ipynb' Notebook as permanent html: https://web.hudson-trading.com/~zwang/permanent_notebooks/20220501_redfin.html